package com.codinghero.example.crawler4j;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;

/**
 * @author Yasser Ganjisaffar <lastname at gmail dot com>
 */

/*
 * IMPORTANT: Make sure that you update crawler4j.properties file and set
 * crawler.include_images to true
 */

public class ImageCrawlController {

        public static void main(String[] args) throws Exception {
                if (args.length < 3) {
                        System.out.println("Needed parameters: ");
                        System.out.println("\t rootFolder (it will contain intermediate crawl data)");
                        System.out.println("\t numberOfCralwers (number of concurrent threads)");
                        System.out.println("\t storageFolder (a folder for storing downloaded images)");
                        return;
                }
                String rootFolder = args[0];
                int numberOfCrawlers = Integer.parseInt(args[1]);
                String storageFolder = args[2];

                CrawlConfig config = new CrawlConfig();

                config.setCrawlStorageFolder(rootFolder);

                /*
                 * Since images are binary content, we need to set this parameter to
                 * true to make sure they are included in the crawl.
                 */
                config.setIncludeBinaryContentInCrawling(true);

                String[] crawlDomains = new String[] { "http://slide.sports.sina.com.cn/o/slide_2_41318_44514.html#p=1" };

                PageFetcher pageFetcher = new PageFetcher(config);
                RobotstxtConfig robotstxtConfig = new RobotstxtConfig();
                RobotstxtServer robotstxtServer = new RobotstxtServer(robotstxtConfig, pageFetcher);
                CrawlController controller = new CrawlController(config, pageFetcher, robotstxtServer);
                for (String domain : crawlDomains) {
                        controller.addSeed(domain);
                }

                ImageCrawler.configure(crawlDomains, storageFolder);

                controller.start(ImageCrawler.class, numberOfCrawlers);
        }

}
